import geopandas as gpd
import tobler
import pyinterpolate
import numpy as np
from libpysal import graph
from sklearn import neighbors
from scipy import interpolateSpatial interpolation
The contents will appear later
This section will contain hands-on material that will appear later.
Area interpolation
simd = gpd.read_file(
"https://martinfleischmann.net/sds/chapter_09/data/edinburgh_simd_2020.gpkg"
)
simd.head(2)| DataZone | DZName | LAName | SAPE2017 | WAPE2017 | Rankv2 | Quintilev2 | Decilev2 | Vigintilv2 | Percentv2 | ... | CrimeRate | CrimeRank | HouseNumOC | HouseNumNC | HouseOCrat | HouseNCrat | HouseRank | Shape_Leng | Shape_Area | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | S01008417 | Balerno and Bonnington Village - 01 | City of Edinburgh | 708 | 397 | 5537 | 4 | 8 | 16 | 80 | ... | 86 | 5392.0 | 17 | 8 | 2% | 1% | 6350.0 | 20191.721420 | 1.029993e+07 | POLYGON ((315157.369 666212.846, 315173.727 66... |
| 1 | S01008418 | Balerno and Bonnington Village - 02 | City of Edinburgh | 691 | 378 | 6119 | 5 | 9 | 18 | 88 | ... | 103 | 5063.0 | 7 | 10 | 1% | 1% | 6650.0 | 25944.861787 | 2.357050e+07 | POLYGON ((317816.000 666579.000, 318243.000 66... |
2 rows × 52 columns
simd[["EmpRate", "geometry"]].explore("EmpRate", tiles="CartoDB Positron")Make this Notebook Trusted to load map: File -> Trust Notebook
grid_8 = tobler.util.h3fy(simd, resolution=8)
grid_8.head(2)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/pyproj/crs/crs.py:1293: UserWarning: You will likely lose important projection information when converting to a PROJ string from another format. See: https://proj.org/faq.html#what-is-the-best-format-for-describing-coordinate-reference-systems
proj = self._crs.to_proj4(version=version)
| geometry | |
|---|---|
| hex_id | |
| 8819727761fffff | POLYGON ((315895.903 677993.995, 315431.809 67... |
| 88197270d7fffff | POLYGON ((313110.882 677234.253, 312646.604 67... |
m = simd.boundary.explore(tiles="CartoDB Positron")
grid_8.boundary.explore(m=m, color="red")Make this Notebook Trusted to load map: File -> Trust Notebook
overlay
interpolated = tobler.area_weighted.area_interpolate(
simd,
grid_8,
extensive_variables=["EmpNumDep", "IncNumDep"],
intensive_variables=["EmpRate", "IncRate"],
)interpolated.explore("EmpRate", tiles="CartoDB Positron")Make this Notebook Trusted to load map: File -> Trust Notebook
Point interpolation
airbnb = gpd.read_file(
"https://martinfleischmann.net/sds/chapter_09/data/edinburgh_airbnb_2023.gpkg"
)
airbnb.head()| id | bedrooms | property_type | price | geometry | |
|---|---|---|---|---|---|
| 0 | 15420 | 1.0 | Entire rental unit | $126.00 | POINT (325921.137 674478.931) |
| 1 | 790170 | 2.0 | Entire condo | $269.00 | POINT (325976.360 677655.252) |
| 2 | 24288 | 2.0 | Entire loft | $95.00 | POINT (326069.186 673072.913) |
| 3 | 821573 | 2.0 | Entire rental unit | $172.00 | POINT (326748.646 674001.683) |
| 4 | 822829 | 3.0 | Entire rental unit | $361.00 | POINT (325691.831 674328.127) |
airbnb.price.head()0 $126.00
1 $269.00
2 $95.00
3 $172.00
4 $361.00
Name: price, dtype: object
airbnb["price_float"] = airbnb.price.str.strip("$").str.replace(",", "").astype(float)two_bed_homes = airbnb[
(airbnb["bedrooms"] == 2)
& (airbnb["property_type"] == "Entire rental unit")
& (airbnb["price_float"] < 300)
].copy()
two_bed_homes.head()| id | bedrooms | property_type | price | geometry | price_float | |
|---|---|---|---|---|---|---|
| 3 | 821573 | 2.0 | Entire rental unit | $172.00 | POINT (326748.646 674001.683) | 172.0 |
| 5 | 834777 | 2.0 | Entire rental unit | $264.00 | POINT (324950.724 673875.598) | 264.0 |
| 6 | 450745 | 2.0 | Entire rental unit | $177.00 | POINT (326493.725 672853.904) | 177.0 |
| 10 | 485856 | 2.0 | Entire rental unit | $157.00 | POINT (326597.124 673869.551) | 157.0 |
| 17 | 51505 | 2.0 | Entire rental unit | $155.00 | POINT (325393.807 674177.409) | 155.0 |
two_bed_homes.geometry.duplicated().any()True
two_bed_homes = two_bed_homes.drop_duplicates("geometry")two_bed_homes.explore("price_float", tiles="CartoDB Positron")Make this Notebook Trusted to load map: File -> Trust Notebook
from libpysal import graph
knn = graph.Graph.build_kernel(two_bed_homes, k=10).transform("r")
two_bed_homes["price_lag"] = knn.lag(two_bed_homes.price_float)
two_bed_homes.explore("price_lag", tiles="CartoDB Positron")Make this Notebook Trusted to load map: File -> Trust Notebook
Nearest
grid_10 = tobler.util.h3fy(simd, resolution=10)
len(grid_10)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/pyproj/crs/crs.py:1293: UserWarning: You will likely lose important projection information when converting to a PROJ string from another format. See: https://proj.org/faq.html#what-is-the-best-format-for-describing-coordinate-reference-systems
proj = self._crs.to_proj4(version=version)
20162
grid_coordinates = grid_10.centroid.get_coordinates()
grid_coordinates.head()| x | y | |
|---|---|---|
| hex_id | ||
| 8a19727516e7fff | 314396.239638 | 668649.425957 |
| 8a1972775007fff | 319532.248509 | 675768.790051 |
| 8a197272bae7fff | 315166.141117 | 678976.818733 |
| 8a19727406b7fff | 318552.784356 | 669260.243895 |
| 8a1972393707fff | 327996.723476 | 670069.583024 |
coords = two_bed_homes.get_coordinates()
coords.head()| x | y | |
|---|---|---|
| 3 | 326748.645636 | 674001.683211 |
| 5 | 324950.723888 | 673875.598033 |
| 6 | 326493.725178 | 672853.903917 |
| 10 | 326597.123684 | 673869.551295 |
| 17 | 325393.807222 | 674177.408961 |
nearest = interpolate.griddata(
coords,
two_bed_homes.price_lag,
grid_coordinates,
method="nearest",
)
nearestarray([ 34. , 188.70398583, 106.14780144, ..., 188.70398583,
188.70398583, 84. ])
grid_10["nearest"] = nearest_ = grid_10.plot('nearest', legend=True)
ax = grid_10.plot('nearest', legend=True)
two_bed_homes.plot(ax=ax, color="red", markersize=1)<Axes: >
caption here

K-Nearest neighbours regression
Uniform
interpolation_uniform = neighbors.KNeighborsRegressor(n_neighbors=10, weights="uniform")interpolation_uniform.fit(
coords, two_bed_homes.price_lag
)KNeighborsRegressor(n_neighbors=10)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KNeighborsRegressor(n_neighbors=10)
price_on_grid = interpolation_uniform.predict(grid_coordinates)
price_on_gridarray([ 93.47717891, 130.09448585, 125.97986269, ..., 120.36697169,
120.36697169, 120.36697169])
grid_10["knn_uniform"] = price_on_grid_ = grid_10.plot("knn_uniform", legend=True)
Distance-weighted
interpolation_distance = neighbors.KNeighborsRegressor(n_neighbors=10, weights="distance")interpolation_distance.fit(
coords, two_bed_homes.price_lag
)KNeighborsRegressor(n_neighbors=10, weights='distance')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KNeighborsRegressor(n_neighbors=10, weights='distance')
grid_10["knn_distance"] = interpolation_distance.predict(grid_coordinates)_ = grid_10.plot("knn_distance", legend=True)
Distance band regression
interpolation_radius = neighbors.RadiusNeighborsRegressor(radius=1000, weights="distance")
interpolation_radius.fit(
coords, two_bed_homes.price_lag
)RadiusNeighborsRegressor(radius=1000, weights='distance')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RadiusNeighborsRegressor(radius=1000, weights='distance')
grid_10["radius"] = interpolation_radius.predict(grid_coordinates)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/sklearn/neighbors/_regression.py:500: UserWarning: One or more samples have no neighbors within specified radius; predicting NaN.
warnings.warn(empty_warning_msg)
_ = grid_10.plot("radius", legend=True, missing_kwds={'color': 'lightgrey'})
Ordinary Kriging
input_data = np.hstack([coords, two_bed_homes.price_lag.values.reshape(-1, 1)])
input_dataarray([[3.26748646e+05, 6.74001683e+05, 2.07033397e+02],
[3.24950724e+05, 6.73875598e+05, 1.54805935e+02],
[3.26493725e+05, 6.72853904e+05, 1.43865293e+02],
...,
[3.28513265e+05, 6.74048892e+05, 1.06875409e+02],
[3.26840903e+05, 6.74767224e+05, 1.68848108e+02],
[3.25415664e+05, 6.73345158e+05, 2.15847334e+02]])
step_radius = 100 # meters
max_range = 5000 # meters
exp_semivar = pyinterpolate.build_experimental_variogram(
input_array=input_data,
step_size=step_radius,
max_range=max_range,
)exp_semivar.plot()
semivar = pyinterpolate.build_theoretical_variogram(
experimental_variogram=exp_semivar,
model_name='linear',
sill=exp_semivar.variance,
rang=5000
)semivar.plot()
ordinary_kriging = pyinterpolate.kriging(
observations=input_data,
theoretical_model=semivar,
points=grid_coordinates.values,
no_neighbors=10,
how="ok",
show_progress_bar=False,
)grid_10["ordinary_kriging"] = ordinary_kriging[:, 0]_ = grid_10.plot("ordinary_kriging", legend=True)
grid_10["variance_error"] = ordinary_kriging[:, 1]
_ = grid_10.plot("variance_error", legend=True)
Use larger distance
semivar_larger = pyinterpolate.build_theoretical_variogram(
experimental_variogram=exp_semivar,
model_name='linear',
sill=exp_semivar.variance,
rang=15000,
)ordinary_kriging_l = pyinterpolate.kriging(
observations=input_data,
theoretical_model=semivar_larger,
points=grid_coordinates.values,
no_neighbors=10,
how="ok",
show_progress_bar=False,
)grid_10["ok_larger"] = ordinary_kriging_l[:, 0]_ = grid_10.plot("ok_larger", legend=True)
grid_10["variance_error_l"] = ordinary_kriging_l[:, 1]
_ = grid_10.plot("variance_error_l", legend=True)